In [ ]:
%run "../Functions/0.4 GF correct answers.ipynb"
print("1. Game sessions")
In [ ]:
#RedMetrics
#user id in format localplayerguid = '"8d352896-a3f1-471c-8439-0f426df901c1"'
# source: https://stackoverflow.com/questions/42047994/regex-how-to-find-a-guid-in-a-long-string
GUIDpattern = '^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$'
def isGUIDFormat( guid ):
return re.search(GUIDpattern, guid)
def getRandomRedMetricsGUID( _rmDF ):
_uniqueUsers = _rmDF['userId'].dropna().unique()
_userCount = len(_uniqueUsers)
_guid = ''
while (not isGUIDFormat(_guid)):
_userIndex = randint(0,_userCount-1)
_guid = _uniqueUsers[_userIndex]
return _guid
def getRandomSessionGUID( _rmDF, _userId = '' ):
rmId = _userId
if( not(isGUIDFormat(_userId))):
rmId = getRandomRedMetricsGUID()
_uniqueSessions = getUserSessions( _rmDF, rmId )
_sessionsCount = len(_uniqueSessions)
_guid = ""
if(0 != _sessionsCount):
_sessionIndex = randint(0,_sessionsCount-1)
_guid = _uniqueSessions.iloc[_sessionIndex]
else:
print("no sessions for userId " + rmId)
return _guid
In [ ]:
def getUserSessions( _rmDF, userId):
return _rmDF.loc[:,['userId','sessionId']][_rmDF['userId']==userId]['sessionId'].drop_duplicates().dropna(how='any')
def getSessionsCount(_rmDF, userId):
return len(getUserSessions( _rmDF, userId ))
def getUserSessionsCounts( _rmDF ):
_allSessions = getAllSessions( _rmDF, True )
return _allSessions.groupby('userId').size().reset_index(name='counts').sort_values(by='counts', ascending=False)
In [ ]:
# get datetime of first significant event
# _userId is assumed to be in RedMetrics format
# returns a pandas._libs.tslib.Timestamp
def getFirstEventDate( _userId, _rmDF ):
_sessions = getUserSessions(_rmDF, _userId)
_firstGameTime = pd.Timestamp.max.tz_localize('utc')
for session in _sessions:
_timedEvents = _rmDF[_rmDF['sessionId']==session]
# drops irrelevant events like 'start' and 'configure' ones
_timedEvents = _timedEvents.dropna(subset=['section'])
if(len(_timedEvents) > 0):
_earliest = _timedEvents['userTime'].min()
if(_earliest < _firstGameTime):
_firstGameTime = _earliest
return _firstGameTime
In [ ]:
# get datetime of first and last significant events
# _userId is assumed to be in RedMetrics format
# returns a pair of pandas._libs.tslib.Timestamp
def getBoundingEventDates( _userId, _rmDF ):
_sessions = getUserSessions(_rmDF, _userId)
_firstGameTime = pd.Timestamp.max.tz_localize('utc')
_lastGameTime = pd.Timestamp.min.tz_localize('utc')
for session in _sessions:
_timedEvents = _rmDF[_rmDF['sessionId']==session]
# drops irrelevant events like 'start' and 'configure' ones
_timedEvents = _timedEvents.dropna(subset=['section'])
if(len(_timedEvents) > 0):
_earliest = _timedEvents['userTime'].min()
_latest = _timedEvents['userTime'].max()
if(_earliest < _firstGameTime):
_firstGameTime = _earliest
if(_latest > _lastGameTime):
_lastGameTime = _latest
return (_firstGameTime,_lastGameTime)